suppressPackageStartupMessages({
library(Seurat)
library(venn)
library(dplyr)
library(cowplot)
library(ggplot2)
library(pheatmap)
library(enrichR)
library(rafalib)
library(cowplot)
library(gplots)
})
plottheme = theme(legend.title = element_text(size = 8),
legend.text = element_text(size = 8),
axis.title = element_text(size = 8),
axis.text = element_text(size = 8),
plot.title = element_text(size = 8))
# library(future)
# plan("multiprocess", workers = 4)
# options(future.globals.maxSize = 24000 * 1024^2)
alldata <- readRDS("../analysis/filtered_IMM_DN_int_clus.rds")
# Set desired clustering resolution
sel.lev = 0.25
# Set the identity as louvain with resolution selected above
sel.clust = paste0("CCA_snn_res.",sel.lev)
alldata@active.assay = "RNA"
alldata$orig.ident = factor(alldata$orig.ident,
levels = c("STD_DN_d0","GW_DN_d0",
"STD_IMM_d0","GW_IMM_d0"))
alldata$Treatment = factor(alldata$Treatment, levels = c("STD","GW"))
alldata <- SetIdent(alldata, value = sel.clust)
table(alldata@active.ident)
alldata$ident_clust = paste0(alldata@active.ident,"_",alldata$orig.ident)
##
## 0 1 2 3 4 5 6 7 8 9 10 11 12
## 11520 7328 6802 5322 5179 4375 3126 2667 2616 2198 1918 1620 1546
## 13 14 15 16
## 1492 1312 567 289
plot_grid(ncol = 2, DimPlot(alldata, label = T, shuffle = TRUE) +
NoAxes() + NoLegend(),
DimPlot(alldata, group.by = "orig.ident", shuffle = TRUE) +
NoAxes() + theme(legend.position = "bottom") +
guides(color=guide_legend(ncol=2,override.aes = list(size = 2))))
ggplot(alldata@meta.data, aes_string(fill= sel.clust, x = "Treatment")) + plottheme +
geom_bar(position = "fill", color = "black") + facet_wrap(~Type) +
theme(panel.background = element_blank(),axis.ticks = element_blank(), axis.title.y = element_blank())
frequencies = as.data.frame(table(data.frame(cluster=alldata@meta.data[,sel.clust],
sample = alldata$orig.ident, treatment = alldata$Treatment,
type = alldata$Type)))
frequencies$Proportion=NA
for(i in 1:nrow(frequencies)){
frequencies$Proportion[i] = frequencies$Freq[i]/sum(frequencies$Freq[frequencies$sample==frequencies$sample[i]])
}
ggplot(frequencies, aes(fill=treatment, y = Proportion, x = cluster)) + plottheme +
geom_bar(stat = "identity",position="dodge") + facet_wrap(~type) +
theme(panel.background = element_blank(),axis.ticks.y = element_blank())
DotPlot(alldata, features = c("Ptprc","Epcam"),
assay = "RNA") + coord_flip() + plottheme
Top 25 (by p-value) genes with logFC>0.2 for each cluster, sorted by logFC:
if(file.exists(paste0("../analysis/clustermarkers_IMM_DN_CCA_",sel.lev,".rds"))){
markers_genes = readRDS(paste0("../analysis/clustermarkers_IMM_DN_CCA_",sel.lev,".rds"))
}else{
markers_genes <- FindAllMarkers(alldata, logfc.threshold = 0.2, test.use = "wilcox",
min.pct = 0.1, min.diff.pct = 0.2, only.pos = TRUE, max.cells.per.ident = 300,
assay = "RNA", random.seed = 42)
saveRDS(markers_genes, paste0("../analysis/clustermarkers_IMM_DN_CCA_",sel.lev,".rds"))
}
top25 <- markers_genes %>% group_by(cluster) %>% top_n(-25, p_val)
top25 <- top25 %>% group_by(cluster) %>% top_n(25, avg_log2FC)
mypar(2, 5, mar = c(4, 6, 3, 1))
for (i in unique(top25$cluster)) {
barplot(sort(setNames(top25$avg_log2FC, top25$gene)[top25$cluster == i], F), horiz = T,
las = 1, main = paste0(i, " vs. rest"), border = "white", yaxs = "i")
abline(v = c(0, 0.25), lty = c(1, 2))
}
Top 5 (by p-value) genes with logFC>0.2 for each cluster:
top5 <- markers_genes %>% group_by(cluster) %>% top_n(-5, p_val_adj)
top5 <- top5 %>% group_by(cluster) %>% top_n(5, avg_log2FC)
alldata <- ScaleData(alldata, features = as.character(unique(top5$gene)), assay = "RNA")
DoHeatmap(alldata, features = as.character(unique(top5$gene)), group.by = sel.clust,
assay = "RNA") + plottheme
#pdf(paste0("../figures/clustermarkers_IMM_DN_",sel.lev,".pdf"), width = 10, height = 10)
#DoHeatmap(alldata, features = as.character(unique(top5$gene)), assay ="RNA",label = FALSE) + labs(color = "Cluster")
#dev.off()
DotPlot(alldata, features = rev(as.character(unique(top5$gene))),
assay = "RNA") + coord_flip() + plottheme
# topAll = markers_genes %>% group_by(cluster)
# DoHeatmap(alldata, features = as.character(unique(topAll$gene)), assay ="RNA",label = FALSE) + labs(color = "Cluster")
# DoHeatmap(alldata, features = as.character(unique(top5$gene)), group.by = sel.clust,
# assay = "RNA", label = FALSE)
#
# DotPlot(alldata, features = rev(as.character(unique(top5$gene))), group.by = sel.clust,
# assay = "RNA") + coord_flip()
Top 3 (by p-value) genes with logFC>0.2 for each cluster:
top3 <- top5 %>% group_by(cluster) %>% top_n(-3, p_val)
top3 <- top3 %>% group_by(cluster) %>% top_n(3, avg_log2FC)
VlnPlot(alldata, features = as.character(unique(top3$gene)), ncol = 4, group.by = sel.clust,
assay = "RNA", pt.size = -1)
for(g in as.character(unique(top3$gene))){
print(FeaturePlot(alldata, features = g, slot = "data") +
NoAxes() + plottheme + theme(legend.key.width = unit(5, "pt"),
legend.key.height = unit(5, "pt")))
}
Based on the top DE genes, here are my predctions for the cell types represented by each cluster:
For each cluster, here are the DE genes between the two treatments:
DGE_list_Treatment = list()
for(i in sort(unique(alldata@active.ident))){
# select all cells in cluster i
cell_selection <- subset(alldata, cells = colnames(alldata)[alldata@active.ident ==
i])
cell_selection <- SetIdent(cell_selection, value = "Treatment")
maxcells = 50
if(min(table(cell_selection@active.ident))<maxcells){
maxcells = min(table(cell_selection@active.ident))
}
# Compute differentiall expression
DGE_cell_selection <- FindAllMarkers(cell_selection, logfc.threshold = 0.2, test.use = "wilcox",
min.pct = 0.1, only.pos = TRUE, max.cells.per.ident = maxcells,
assay = "RNA")
DGE_list_Treatment[[paste("Cluster",i)]] = DGE_cell_selection[DGE_cell_selection$p_val_adj<0.05,]
}
for(i in sort(unique(alldata@active.ident))){
top20_cell_selection <- DGE_list_Treatment[[paste("Cluster",i)]]
# print(VlnPlot(subset(alldata, idents = i),unique(top20_cell_selection$gene),
# group.by = "orig.ident", assay = "RNA") + theme(axis.text = element_text(size = 1)))
if(nrow(top20_cell_selection)>0){
print(DotPlot(subset(alldata, idents = i), features = rev(as.character(unique(top20_cell_selection$gene))),
assay = "RNA", group.by = "Treatment") + coord_flip() + plottheme +
labs(title = paste("Cluster",i,paste(top3$gene[top3$cluster==i],collapse = ", "))))
}
}